package com.ruoyi.common.utils.html;

import com.ruoyi.common.utils.ExceptionUtil;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;

/**
 * @auther fcb
 * description：
 * date: 2025/5/3 14:52
 */
@Slf4j
public class JsoupUtils {

    /**
     * 请求URL获取文档对象
     * @param url
     * @return
     */
    public static Document getDocumentFromURL(String url) {
        try {
            return Jsoup.connect(url)
                    .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36")
                    .timeout(10000)
                    .get();
        } catch (Exception e) {
            log.error("获取html文档失败：{}", ExceptionUtil.getExceptionMessage(e));
        }
        return null;
    }

    /**
     * 将HTML代码保存为文件
     * @param html
     * @param htmlPath
     */
    public static boolean saveHtml(String html, Path htmlPath) {
        try (OutputStream outputStream = Files.newOutputStream(htmlPath)
        ) {
            outputStream.write(html.getBytes());
            return true;
        } catch (IOException e) {
            log.error("保存html文档失败：{}", ExceptionUtil.getExceptionMessage(e));
            return false;
        }
    }

}
